In [1]:
import json
import numpy as np
from ..src.data

In [29]:
def import_labels(f):
    ''' Read from a file all the labels from it '''
    lines = f.readlines()
    labels = []
    i = 0
    for l in lines:
        t = l.split('\t')
        assert int(t[0]) == i
        label = t[1].split('\n')[0]
        labels.append(label)
        i += 1
    return labels

def to_categorical(y, nb_classes=None):
    ''' Convert class vector (integers from 0 to nb_classes)
    to binary class matrix, for use with categorical_crossentropy.
    '''
    if not nb_classes:
        nb_classes = np.max(y)+1
    Y = np.zeros((len(y), nb_classes))
    for i in range(len(y)):
        Y[i, y[i]] = 1.
    return Y

def generate_output(video_info, labels, length=16):
    ''' Given the info of the vide, generate a vector of classes corresponding the output for each
    clip of the video which features have been extracted.
    '''
    nb_frames = video_info['num_frames']
    last_first_name = nb_frames - length + 1

    start_frames = range(0, last_first_name, length)

    # Check the output for each frame of the video
    outputs = ['none'] * nb_frames
    for i in range(nb_frames):
        # Pass frame to temporal scale
        t = i / float(nb_frames) * video_info['duration']
        for annotation in video_info['annotations']:
            if t >= annotation['segment'][0] and t <= annotation['segment'][1]:
                outputs[i] = annotation['label']
                label = annotation['label']
                break

    instances = []
    for start_frame in start_frames:
        # Obtain the label for this isntance and then its output
        output = None

        outs = outputs[start_frame:start_frame+length]
        if outs.count(label) >= length / 2:
            output = labels.index(label)
        else:
            output = 0
        instances.append(output)

    return instances

In [5]:
with open("../dataset/labels.txt", "r") as f:
    labels = import_labels(f)

In [10]:
with open("../dataset/videos.json", "r") as f:
    videos_info = json.load(f)
video_info = videos_info['Uw_0h2UrfyY']
print(video_info)


{'subset': 'validation', 'num_frames': 4157, 'url': 'https://www.youtube.com/watch?v=Uw_0h2UrfyY', 'duration': 139.04, 'resolution': '426x240', 'annotations': [{'segment': [19.07183775351014, 117.91560686427458], 'label': 'Ballet'}]}

In [31]:
instances = generate_output(video_info, labels, length=16)
print(instances)
Y = to_categorical(instances, nb_classes=200)
print(Y[100:300])


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]]

Old test with old code implemented.


In [2]:
video = None
for v in dataset.get_subset_videos('validation'):
    if v.video_id == 'Uw_0h2UrfyY':
        video = v
print(video.serialize())


{'subset': 'validation', 'resolution': '426x240', 'duration': 139.04, 'url': 'https://www.youtube.com/watch?v=Uw_0h2UrfyY', 'annotations': [{'segment': [19.07183775351014, 117.91560686427458], 'label': 'Ballet'}], 'num_frames': 4157}

In [3]:
video.get_video_instances(16, 0)
ground_trouth = np.array([instance.output for instance in video.instances])
print(ground_trouth)


[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177 177
 177 177 177 177   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0]

In [4]:
detection_prediction_path = detection_predictions_path + video.video_id + '.npy'
classification_prediction_path =  classification_predictions_path + video.video_id + '.npy'

class_prediction = np.load(classification_prediction_path)
detection_prediction = np.load(detection_prediction_path)

print(class_prediction)
print(detection_prediction)


[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0 105 105 105 105 105   0   0   0   0   0   0   0   0 105 105 105 105
 105 105 105 105 105 105 105 105 105   0   0 105 105 105 105 105 105 105
 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105
 105 105 105 105   0 105 105   0 105   0   0   0   0   0   0   0   0   0
   0 177 177 177   0   0 177   0   0 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177   0   0   0   0 177   0 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177   0 177   0   0 177 177 177 177
 177 177 177 177 177   0   0 177 177 105 105 177 177 105 105 105 105 177
 177 177 105 177 177 177 177 177 177 177 177 177 177 177 177 105 105 105
 105 105 105   0   0 105   0 177 177 177 177 177   0 177 177 177 177 177
 177 177 177 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105
 105 105   0   0   0 105 105 105 105 105 105   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0 105 177 177 177 105   0   0   0   0   0
   0   0   0   0   0   0   0]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

In [5]:
mix = np.zeros(class_prediction.shape, dtype=np.int64)
for pos in range(class_prediction.size):
    if detection_prediction[pos] == 1:
        mix[pos] = class_prediction[pos]

print(mix)

prediction = get_temporal_predictions_2(mix, fps=video.fps)
pprint.pprint(prediction)


[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0 105 105 105 105 105   0   0   0   0   0   0   0   0 105 105 105 105
 105 105 105 105 105 105 105 105 105   0   0 105 105 105 105 105 105 105
 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105
 105 105 105 105   0 105 105   0 105   0   0   0   0   0   0   0   0   0
   0 177 177 177   0   0 177   0   0 177 177 177 177 177 177 177 177 177
 177 177 177 177 177 177 177   0   0   0   0 177   0 177 177 177 177 177
 177 177 177 177 177 177 177 177 177 177   0 177   0   0 177 177 177 177
 177 177 177 177 177   0   0 177 177 105 105 177 177 105 105 105 105 177
 177 177 105 177 177 177 177 177 177 177 177 177 177 177 177 105 105 105
 105 105 105   0   0 105   0 177 177 177 177 177   0 177 177 177 177 177
 177 177 177 105 105 105 105 105 105 105 105 105 105 105 105 105 105 105
 105 105   0   0   0 105 105 105 105 105 105   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0 105 177 177 177 105   0   0   0   0   0
   0   0   0   0   0   0   0]
[{'label': 105,
  'scores': 1.0,
  'segment': [10.167948039451527, 12.843723839307192]},
 {'label': 105,
  'scores': 1.0,
  'segment': [17.124965119076254, 24.081982198700985]},
 {'label': 105,
  'scores': 1.0,
  'segment': [25.15229251864325, 40.67179215780611]},
 {'label': 105,
  'scores': 1.0,
  'segment': [41.20694731777724, 42.27725763771951]},
 {'label': 105,
  'scores': 1.0,
  'segment': [42.81241279769064, 43.347567957661774]},
 {'label': 177, 'scores': 1.0, 'segment': [48.6991195573731, 50.3045850372865]},
 {'label': 177,
  'scores': 1.0,
  'segment': [51.374895357228766, 51.9100505171999]},
 {'label': 177,
  'scores': 1.0,
  'segment': [52.980360837142165, 61.5428433966803]},
 {'label': 177,
  'scores': 1.0,
  'segment': [63.68346403656483, 64.21861919653595]},
 {'label': 177, 'scores': 1.0, 'segment': [64.7537743565071, 72.7811017560741]},
 {'label': 177,
  'scores': 1.0,
  'segment': [73.31625691604522, 73.85141207601636]},
 {'label': 177,
  'scores': 1.0,
  'segment': [74.92172239595862, 79.73811883569881]},
 {'label': 177,
  'scores': 0.59375,
  'segment': [80.80842915564108, 97.93339427471734]},
 {'label': 105,
  'scores': 0.40625,
  'segment': [80.80842915564108, 97.93339427471734]},
 {'label': 105,
  'scores': 1.0,
  'segment': [99.00370459465961, 99.53885975463074]},
 {'label': 177,
  'scores': 1.0,
  'segment': [100.07401491460188, 102.74979071445753]},
 {'label': 105,
  'scores': 0.68000000000000005,
  'segment': [103.28494587442867, 116.66382487370699]},
 {'label': 177,
  'scores': 0.32000000000000001,
  'segment': [103.28494587442867, 116.66382487370699]},
 {'label': 105,
  'scores': 1.0,
  'segment': [118.26929035362039, 121.48022131344719]},
 {'label': 177,
  'scores': 0.59999999999999998,
  'segment': [129.5075487130142, 132.18332451286986]},
 {'label': 105,
  'scores': 0.40000000000000002,
  'segment': [129.5075487130142, 132.18332451286986]}]

In [ ]: